### reduced form analysis final

## ===================================================================
## LIBRARIES AND HELPER FUNCTIONS
## ===================================================================

require(ggplot2)
require(lfe)
require(data.table)
require(stargazer)
require(lubridate)

## custom theme
theme_dan <- function(base_size=12){
  	theme_classic(base_size = base_size)+
  	##gridlines
	theme(axis.line.x = element_line(color='black'), axis.line.y=element_line(color='black')) + 
	theme(legend.position='bottom', legend.title=element_blank())  
}

proper <- function(x){gsub("(?<=\\b)([a-z])", "\\U\\1", tolower(x), perl=TRUE)}



## ===================================================================
## DATA INPUT
## ===================================================================


## SET DATA DIRECTORY HERE
## setwd(...)


## -- -- -- -- -- --
## monthly microdata
mdat <- fread('aggregate_monthly_panel.csv')		
smap <- data.table(input=c('', 'FW11', 'FW12', 'FW13', 'FW14', 'FW15', 'OUTLET', 'SS11', 'SS12', 'SS13', 'SS14', 'SS15'), 
		out=as.Date(c(NA,'2011-09-01', '2012-09-01', '2013-09-01', '2014-09-01', '2015-09-01', NA, 
			'2011-03-01', '2012-03-01', '2013-03-01', '2014-03-01', '2015-03-01'),format='%Y-%m-%d'))
mdat$season <- smap$out[match(mdat$season_1s, smap$input)]
mdat <- mdat[!is.na(mdat$season), ]

## -- -- -- -- -- --
## other data
dt <- fread('materials.csv')				## product_sku + material data (multiple rows per SKU, if multiple materials)
dt3 <- fread('natural_coding.csv')			## product_sku natural coding (one row per SKU)
erdats <- fread('erdats.csv')				## seasonal and lagged exchange rate data
usdrub <- fread('USD RUB Historical Data.csv')	## daily USD RUB exchange rate values

mdat$season <- as.Date(mdat$season)		## convert date columns to date type variables
dt$season <- as.Date(dt$season)		## materials content of each SKU
dt3$season <- as.Date(dt3$season)		## classification of each SKU
erdats$season <- as.Date(erdats$season)
usdrub$date <- as.Date(usdrub$Date, format='%b %d, %Y')

mdt <- merge(mdat, dt3, by=c('product_sku', 'brand', 'targetgroup', 'season'))		## merge materials into microdata

mdti <- mdt[, by=c('product_sku', 'brand', 'targetgroup', 'season'), 
	list(rus=rus[1], nat=nat[1], date=min(date), inv=sum(sales), cog=cog_n[1], price=price[1])]	## aggregate microdata to season level
mdti$date <- as.Date(mdti$date)
## in this dataset, the date is when the product first appears 
## mdti: seasonal data with materials and sales


## -- -- -- -- -- --
## khandelwal estimated residuals by product

fedt_all <- fread('khandelwal_residuals.csv')
fedt_all$season <- as.Date(fedt_all$season)


## ===================================================================
## DATA SUMMARY SECTION, TABLES AND GRAPHS
## ===================================================================

## SET OUTPUT DIRECTORY HERE
## setwd(...)


## ---------------------------------------------------------
## APPENDIX FIGURE A.2
## graph: within a season (e.g. SS13), what fraction of all skus introduced that season are introduced in a particular month

## graph: first purchased appearance of goods, do they appear in the right timeframe
##	mdti is already aggregated to season level, with date column = first sales appearance
mdti$dmonth <- month(mdti$date, label=T)		
mdti$smonth <- month(mdti$season)

hist1 <- mdti[season >= as.Date('2012-03-01'), by=c('smonth', 'dmonth'), length(unique(product_sku))]		## number of SKUs introduced by month of year
hist1 <- hist1[order(smonth, dmonth), ]
hist1[, by='smonth', 'tot' := sum(V1)]
hist1$share <- hist1$V1/hist1$tot
hist1$Season <- ifelse(hist1$smonth == 3, 'Spring', 'Fall')


pdf('share_new_introductions_window.pdf', height=4, width=6)
ggplot(data=NULL, aes(x=dmonth, y=share, fill=Season, group=Season, colour=Season)) + 
	geom_bar(data=hist1[Season == 'Spring', ], aes(x=dmonth, y=share), stat='identity', alpha=0.2) + 
	geom_bar(data=hist1[Season == 'Fall', ], aes(x=dmonth, y=share), stat='identity', alpha=0) +
	annotate("rect", xmin = 2.5, xmax = 8.5, ymin = 0, ymax = Inf, alpha = .2) + theme_dan() + 
	theme(axis.title.x=element_blank()) + ylab('Share of season SKUs introduced that month') 
dev.off()
## report: what share of new products in the firm's season introduced during the season we define
sum(hist1$share[hist1$Season == 'Spring' & hist1$dmonth %in% c('Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug')])
sum(hist1$share[hist1$Season == 'Fall' & !(hist1$dmonth %in% c('Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug'))])


## ---------------------------------------------------------
## APPENDIX FIGURE A.1
## graph: within a season (e.g. SS13), what fraction of all revenues earned that season are earned in a particular month

mdat$dmonth <- month(mdat$date, label=T)
mdat$smonth <- month(mdat$season)

hist2 <- mdat[season >= as.Date('2012-03-01'), by=c('smonth', 'dmonth'), sum(sales*sale_price)]
hist2[, by='smonth', 'tot' := sum(V1)]
hist2$share <- hist2$V1/hist2$tot
hist2$Season <- ifelse(hist2$smonth == 3, 'Spring', 'Fall')

pdf('share_sales_window.pdf', height=4, width=6)
ggplot(hist2) + 
	geom_rect(xmin = 2.5, xmax = 8.5, ymin = 0, ymax = Inf, alpha = .2, fill='grey90') +
	theme_dan() + 
	geom_bar(data=hist2[Season == 'Spring', ], aes(x=dmonth, y=share, fill=Season, group=Season, colour=Season), stat='identity', alpha=0.2) + 
	geom_bar(data=hist2[Season == 'Fall', ], aes(x=dmonth, y=share, fill=Season, group=Season, colour=Season), stat='identity', alpha=0) +
	theme(axis.title.x=element_blank()) + ylab('Share of season revenue earned that month')
dev.off()

sum(hist2$share[hist2$Season == 'Spring' & hist2$dmonth %in% c('Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug')])
sum(hist2$share[hist2$Season == 'Fall' & !(hist2$dmonth %in% c('Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug'))])


## ---------------------------------------------------------
## APPENDIX FIGURE A.3
## graph: sales share of goods within the 6 month window

mdat$window <- as.Date(paste(year(mdat$date), ifelse(month(mdat$date) %in% 3:8, 3, 9), '01', sep='-'))

hist3 <- mdat[season >= as.Date('2012-03-01'), by=c('season_1s', 'season', 'window'), sum(sales*sale_price)]
hist3[, by='window', 'tot' := sum(V1)]
hist3$share <- hist3$V1/hist3$tot

hist3 <- hist3[share > 0.05, ]
hist3[, by='window', 'share' := share/sum(share)]

hist3$Window <- ifelse(month(hist3$window) == 3, paste('Mar ', year(hist3$window), '-\n', 'Aug ', year(hist3$window), sep=''), 
				paste('Sep ', year(hist3$window), '-\n', 'Feb ', year(hist3$window)+1, sep=''))
hist3$Window[grepl('2016', hist3$Window)] <- 'Sep 2015-\n Sep 2015'


pdf('share_sales_seasons.pdf', height=4.5, width=7)
ggplot(data=hist3[window >= as.Date('2012-03-01'), ], aes(x=reorder(Window, window), y=share, fill=season_1s, group=season_1s)) + 
	geom_bar(stat='identity', alpha=0.6) + theme_dan() +
	theme(axis.title.x=element_blank()) + ylab('Share of revenue') + scale_fill_brewer(palette="Accent") + 
	labs(fill='Season')
dev.off()


## ---------------------------------------------------------
## APPENDIX TABLE A.1
## mapping from fabric to quality, and prevalence of blends

#dtt <- dt3[product_sku %in% mdti$product_sku, ]
dtt <- dt[product_sku %in% mdti$product_sku, ]
dtt <- dtt[!is.na(mat), ]

tab3 <- dtt[mat != '', ]
tab3$mat[tab3$mat %in% c('elastane', 'spandex', 'lycra')] <- 'elastane'
tab3[, by=.(mat), 'nat' := 1 - 1*(('polymer' %in% mat) | grepl('artificial', mat) |
			(grepl('polyester', mat)) | (grepl('acrylic', mat)) | (grepl('acetate', mat)) | 
			(grepl('synth', mat)) | (grepl('lurex', mat)) | (grepl('nylon', mat)) )]
tab3[, by=.(product_sku), 'blend' := 1*(.N > 1)]

tab3 <- tab3[, by='mat', list(nat=nat[1], 
				nskus=length(unique(product_sku)), 
				blends = length(unique(product_sku[blend > 0]))/length(unique(product_sku)) )]
tab3 <- tab3[order(-nskus), ]

(length(unique(dtt$product_sku[dtt$mat %in% tab3$mat[1:30]]))/length(unique(dtt$product_sku)))		## 97% of SKUs
#tab3a <- dtt[, by='product_sku', sum(!(mat %in% tab3$mat[1:30]))]

## cleaning
tab3 <- tab3[1:28, ]
tab3$mat <- proper(tab3$mat)
tab3a <- tab3[, .(nat, mat, nskus, blends)]
tab3a$nat[tab3a$mat %in% c('Elastane', 'Spandex', 'Lycra')] <- -1
tab3a <- tab3a[order(-nat, -nskus), ]

## new table A.1. 
##	modifications needed after put into paper: add in \hline between categories
setnames(tab3a, c('Quality', 'Material', 'Num. SKUs', 'Blend Fraction'))
tab3a$Quality[match(c(1,0,-1), tab3a$Quality)] <-  c('High', 'Low', 'Dropped')
tab3a$Quality[!(tab3a$Quality %in% c('High', 'Low', 'Dropped'))] <- NA

stargazer(tab3a, summary=F, rownames=F)


## ---------------------------------------------------------
## APPENDIX TABLE A.2
## material breakout by targetgroup, top three fabrics
##	include blends

dtt <- dt[product_sku %in% mdti$product_sku, ]
dtt <- dtt[!is.na(mat), ]

tab4 <- dtt
tab4 <- merge(tab4, mdat[, by=.(product_sku), list(cog=cog_n[1])], by=c('product_sku'), all.x=T)

temp <- erdats[, .(season, price)]
temp$deflate <- temp$price/temp$price[temp$season == as.Date('2014-03-01')]

tab4 <- merge(tab4, temp[, .(season, deflate)], by='season')

tab4[, by=.(mat), 'nat' := 1 - 1*(('polymer' %in% mat) | grepl('artificial', mat) |
			(grepl('polyester', mat)) | (grepl('acrylic', mat)) | (grepl('acetate', mat)) | 
			(grepl('synth', mat)) | (grepl('lurex', mat)) | (grepl('nylon', mat)) )]

tab4[, by=.(targetgroup), 'totskus' := list(length(unique(product_sku)))]
tab4 <- tab4[!(mat %in% c('elastane', 'lycra', 'spandex', '')), ]			## drop stretchy stuff

tab4 <- tab4[, by=.(targetgroup, product_sku), list(mat = paste(sort(mat), collapse='/'), high=max(nat), totskus=totskus[1], cog=cog[1]/deflate[1])]
tab4 <- tab4[, by=.(targetgroup, mat), list(high=high[1], nskus=length(unique(product_sku)), totskus=totskus[1], cog=mean(cog, na.rm=T))]
tab4 <- tab4[order(targetgroup, -nskus), ]
tab4 <- tab4[, by=.(targetgroup), list(mat=mat[1:3], high=high[1:3], nskus=nskus[1:3], totskus=totskus[1], cog=cog[1:3])]

tab4$share <- round(tab4$nskus/tab4$totskus,2)
tab4$targetgroup <- proper(gsub('&', 'AND', tab4$targetgroup))
tab4$mat <- proper(tab4$mat)
tab4  <- tab4[order(high, decreasing=F), ]
tab4$high <- factor(ifelse(tab4$high == 1, 'High Quality', 'Low Quality'), levels=c('Low Quality', 'High Quality'))


tab4a <- tab4[order(targetgroup, -nskus), .(targetgroup, mat, high, share)]
setnames(tab4a, c('Group', 'Material', 'Quality', 'Share'))
tab4a$Quality <- ifelse(tab4a$Quality == 'High Quality', 'High', 'Low')
tab4a <- cbind(tab4a[1:(nrow(tab4a)/2), .(Group, Material, Quality, Share)], 
			tab4a[(nrow(tab4a)/2 + 1):nrow(tab4a), .(Group, Material, Quality, Share)])
tab4a[[1]][c(seq(2,39,3), seq(3,39,3))] <- NA
tab4a[[5]][c(seq(2,39,3), seq(3,39,3))] <- NA

stargazer(tab4a, float.env='sidewaystable', summary=F, row.names=F)


## ---------------------------------------------------------
## TABLE 1 IN THE PAPER
##	cross-sectional summary statistics by product group

ss1 <- mdti[, by='targetgroup', list(nskus=length(unique(product_sku)), 
		quality = mean(nat), rus=mean(rus))]
ss1$share <- ss1$nskus/sum(ss1$nskus)
ss1 <- ss1[, c('targetgroup', 'share', 'quality', 'rus'), with=F]
ss1 <- ss1[order(targetgroup), ]
setnames(ss1, c('Group', 'Share', 'Quality', 'Rus.'))
ss1$Group <- proper(ss1$Group)
ss1$Group <- gsub('&', 'and', ss1$Group)

ss1 <- cbind(ss1[1:13, ], ss1[14:26])
stargazer(ss1, summary=F, rownames=F)


## ---------------------------------------------------------
## TABLE 2 IN THE PAPER
## summary statistics by season

ss2 <- mdti[, by='season', list(highfrac=mean(nat), nskus=length(unique(product_sku)), 
		sales=sum(inv), price=mean(price), cog=mean(cog, na.rm=T))]
ss2 <- ss2[order(season) & season >= as.Date('2012-03-01'), ]
ss2 <- merge(ss2, erdats[, c('season', 'price'), with=F], by='season')
setnames(ss2, c('Season', 'Quality', 'Num. SKUs', 'Quantity', 'Price', 'Raw Cost', 'Avg. RUB/USD'))
ss2$Price <- as.integer(round(ss2$Price))
ss2[[6]] <- as.integer(round(ss2[[6]]))

stargazer(ss2, summary=F, rownames=F)


## ---------------------------------------------------------
## FIGURE 1 IN THE PAPER

## daily exchange rate data
erddt <- usdrub
erddt$er <- erddt$Price
erddt<- erddt[date >= as.Date('2012-03-01') & date < as.Date('2015-09-01') & !is.na(date), .(date, er)]
erddt$er <- as.numeric(erddt$er)
erddt$norm_er <- 100*erddt$er/erddt$er[erddt$date == min(erddt$date)]
erddt$label <- 'Exchange rate'



ff1 <- mdat[season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01') & orders < quantile(orders,na.rm=T,0.99), 
		by=.(season, product_sku), 
		list(cog_n = max(cog_n,na.rm=T), orders = sum(orders,na.rm=T))]
ff1 <- ff1[, by=.(season), list(mean(cog_n[cog_n > -Inf],na.rm=T), sum(cog_n[cog_n > -Inf]*orders[cog_n > -Inf],na.rm=T)/sum(orders[cog_n > -Inf],na.rm=T))]
ff1 <- melt(ff1, id.vars = 'season', measure.vars=c('V1', 'V2'))
ff1$value[ff1$season == max(ff1$season)] <- NA 
ff1$variable <- ifelse(ff1$variable == 'V1', 'Mean wholesale cost', 'Inventory-weighted mean wholesale cost')

p <- ggplot() + geom_step(data = ff1, mapping = aes(x = season, y = value, linetype = variable, group = variable)) + 
	scale_linetype_manual(values = c('solid', 'dotdash', 'longdash')) + 
	geom_line(data = erddt, mapping = aes(x = date, y =  er*28, linetype = label)) + 
	scale_y_continuous(name = 'Cost of goods sold (nominal rubles)', sec.axis = sec_axis(~./28, name = 'Rubles per USD')) + 
	xlab('') + theme_dan()

pdf('data_exchange_rates_cogs.pdf', height=4, width=6)
p
dev.off()



## ===================================================================
## REDUCED FORM ANALYSIS - PROFIT/OUTCOME DIFFERENTIALS FOR HIGH VS LOW QUALITY
## ===================================================================

## ex ante profit, revenue, price, cost, and quantity of high versus low quality products
##	high quality stuff is MORE PROFITABLE, great
##	idea: it's more profitable (as in the quality sorting literatures), but 
##		also dropped more quickly (ensuring incomplete pass through)

pcs <- mdt[season <= as.Date('2014-09-01'), 
	by=c('product_sku', 'targetgroup'), list(pi = sum((price - cog_n)*sales), nat=nat[1], brand=brand[1], rev=sum(price*sales), 
			p=sum(price*sales)/sum(sales), q=sum(sales), cog_n = sum(cog_n*sales)/sum(sales), 
			mu=sum(sales*price/cog_n)/sum(sales), mu_1 = price[date == min(date)]/cog_n[date == min(date)], season=season[1])]
##pcs[, by='targetgroup', list(mean(pi[nat == 1],na.rm=T) - mean(pi[nat == 0], na.rm=T), mean(p[nat==1],na.rm=T) - mean(p[nat==0], na.rm=T))]
pcs$tgb <- paste(pcs$targetgroup, pcs$brand, sep='_')
pcs$tgs <- paste(pcs$targetgroup, pcs$season, sep='_')
pcs$tgbs <- paste(pcs$targetgroup, pcs$brand, pcs$season, sep='_')


pcs$lpi <- log(pcs$pi)
pcs$lrev <- log(pcs$rev)
pcs$lq <- log(pcs$q)
pcs$lp <- log(pcs$p)
pcs$lcog_n <- log(pcs$cog_n)
pcs$lmu <- log(pcs$mu)

## ---------------------------------------------------------
## TABLE 3 IN PAPER
## profit regression with targetgroup X season fixed effects
##	higher profit for higher qualities

pcs.fe1c <- felm(lpi ~ nat | tgs | 0 | tgs , data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe2c <- felm(lrev ~ nat | tgs   | 0 | tgs , data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe3c <- felm(lq ~ nat | tgs  | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe4c <- felm(lp ~ nat | tgs  | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe5c <- felm(lcog_n ~ nat | tgs  | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe6c <- felm(lmu ~ nat | tgs | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf, ])

stargazer(pcs.fe1c, pcs.fe2c, pcs.fe3c, pcs.fe4c, pcs.fe5c, pcs.fe6c, 
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Group $\\times$ Season FE',  '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark')))


## ---------------------------------------------------------
## TABLE B.1 IN PAPER
## profit regression with targetgroup X brand X season fixed effects
##	higher profit for higher qualities

pcs.fe1 <- felm(lpi ~ nat | tgbs | 0 | tgbs, data=pcs[lpi > -Inf & lcog_n > -Inf, ])
pcs.fe2 <- felm(lrev ~ nat | tgbs  | 0 | tgbs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe3 <- felm(lq ~ nat | tgbs | 0 | tgbs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe4 <- felm(lp ~ nat | tgbs | 0 | tgbs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe5 <- felm(lcog_n ~ nat | tgbs| 0 | tgbs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])
pcs.fe6 <- felm(lmu ~ nat | tgbs| 0 | tgbs, data=pcs[lpi > -Inf & lcog_n > -Inf ,])

stargazer(pcs.fe1, pcs.fe2, pcs.fe3, pcs.fe4, pcs.fe5, pcs.fe6, 
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Brand $\\times$ Group $\\times$ Season FE',  '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark') 
					))

## ===================================================================
## REDUCED FORM ANALYSIS - DOWNGRADING
## ===================================================================


## ---------------------------------------------------------
## FIGURE 2 IN PAPER
## diff-in-diff coefficient plot for timing check (season level)
## 	no exchange rate coefficient, just time dummies

ee2b <- mdti[, by=c('season', 'rus', 'targetgroup'), list(mean(nat), sum(inv*nat)/sum(inv))]
ee2b$nonrus <- 1 - ee2b$rus
ee2b$tgnr <- paste(ee2b$targetgroup, ee2b$nonrus)
ee2b$tgs <- paste(ee2b$targetgroup, ee2b$season)
ee2b$fseason <- as.factor(ee2b$season)
ee2b <- ee2b[season >= as.Date('2011-03-01') & season <= as.Date('2015-09-01'), ]

fe2b <- felm(V1 ~ fseason:nonrus | fseason + tgnr | 0 | tgnr, 
		data=ee2b[season >= as.Date('2011-03-01') & season <= as.Date('2015-09-01') , ])
seasons <- sort(unique(ee2b$season))
ee2bp <- data.table(season=seasons, coef=coef(fe2b), se=fe2b$cse)

ee2b$s1 <- 1*(ee2b$season == as.Date('2012-03-01'))
ee2b$s2 <- 1*(ee2b$season == as.Date('2012-09-01'))
ee2b$s3 <- 1*(ee2b$season == as.Date('2013-03-01'))
ee2b$s4 <- 1*(ee2b$season == as.Date('2013-09-01'))
ee2b$s5 <- 1*(ee2b$season == as.Date('2014-03-01'))
ee2b$s6 <- 1*(ee2b$season == as.Date('2014-09-01'))
ee2b$s7 <- 1*(ee2b$season == as.Date('2015-03-01'))
ee2b$s8 <- 1*(ee2b$season == as.Date('2015-09-01'))
fe2c <- felm(V1 ~ nonrus:s2 + nonrus:s3 + nonrus:s4 + nonrus:s5 + nonrus:s6 + nonrus:s7 + nonrus:s8 | tgs + tgnr | 0 | tgnr, 
		data=ee2b[season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01'), ])
ee2cp <- data.table(season=seasons[seasons >= as.Date('2012-03-01')], coef=c(0, coef(fe2c)),se=c(NA, fe2c$cse))


## OUTPUT
pdf('REStat_downgrading_timing.pdf', height=4, width=6)
ggplot(data=ee2cp[season >= as.Date('2012-03-01'), ], aes(x=season, y=coef)) + 
		geom_errorbar(aes(ymin=coef-1.96*se, ymax=coef+1.96*se), width=20) + geom_point() + 
		theme_dan() + theme(axis.title.x=element_blank()) + ylab('Coefficient') + 
		geom_hline(yintercept=0, linetype='dashed', col='black', alpha=0.5)
dev.off()


## ---------------------------------------------------------
## diff-in-diff regressions for EXCHANGE RATE COEFFICIENT
## 	no time dummies, just lagged exchange rate coefficient

ee3b <- mdti[, by=c('season', 'rus', 'targetgroup'), 
		list(mean(nat), sum(inv*nat)/sum(inv), 
			(1 + sum(nat==1))/(1 + sum(nat==0)), (1 + sum(inv[nat==1]))/(1 + sum(inv[nat==0])))]
ee3b <- merge(ee3b, erdats, by='season')
ee3b$nonrus <- 1 - ee3b$rus
ee3b$tgnr <- paste(ee3b$targetgroup, ee3b$nonrus)
ee3b$tgs <- paste(ee3b$targetgroup, ee3b$season, sep='-')		
ee3b$tgnr_soy <- paste(ee3b$targetgroup, ee3b$nonrus, month(ee3b$season), sep='-')

 
## -- -- -- -- -- --
## TABLE 4 IN PAPER
## downgrading regressions
## 	for natfrac (Nh/(Nh + Nl)) and for log(Nh/Nl) (structurally motivated object) (add one to numerator and denominator for valid fractions)

fe3b <- felm(V1 ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
fe3b2 <- felm(V1 ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
fe3b3 <- felm(log(V3) ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
fe3b4 <- felm(log(V3) ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])

stargazer(fe3b, fe3b2, fe3b3, fe3b4, type='text')

## OUTPUT
stargazer(fe3b, fe3b2, fe3b3, fe3b4,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))


## -- -- -- -- -- --
## APPENDIX TABLE B.2 
## 	BASELINE RESULTS ROBUSTNESS: trim final season

fe3b.t <- felm(V1 ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])
fe3b2.t <- felm(V1 ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])
fe3b3.t <- felm(log(V3) ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])
fe3b4.t <- felm(log(V3) ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])

stargazer(fe3b.t, fe3b2.t, fe3b3.t, fe3b4.t, type='text')

## OUTPUT
stargazer(fe3b.t, fe3b2.t, fe3b3.t, fe3b4.t,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))


## -- -- -- -- -- --
## APPENDIX TABLE B.3
## 	BASELINE RESULTS ROBUSTNESS: inventory weightings

fe3b.iw <- felm(V2 ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
fe3b2.iw <- felm(V2 ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
fe3b3.iw <- felm(log(V4) ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
fe3b4.iw <- felm(log(V4) ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])

stargazer(fe3b.iw, fe3b2.iw, fe3b3.iw, fe3b4.iw, type='text')

## OUTPUT 
stargazer(fe3b.iw, fe3b2.iw, fe3b3.iw, fe3b4.iw,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))

## -- -- -- -- -- --
## APPENDIX TABLE B.4
## 	BASELINE RESULTS ROBUSTNESS: season-of-year dummies with tgnr

fe3b.soy <- felm(V1 ~ nonrus:log(l1) | tgnr_soy + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])
fe3b2.soy <- felm(V1 ~ nonrus:log(l1) | tgnr_soy + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])
fe3b3.soy <- felm(log(V3) ~ nonrus:log(l1) | tgnr_soy + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])
fe3b4.soy <- felm(log(V3) ~ nonrus:log(l1) | tgnr_soy + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])

stargazer(fe3b.soy, fe3b2.soy, fe3b3.soy, fe3b4.soy, type='text')

## OUTPUT
stargazer(fe3b.soy, fe3b2.soy, fe3b3.soy, fe3b4.soy,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin $\\times$ Season-of-Year FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))


## -- -- -- -- -- --
## APPENDIX TABLE B.5
## 	BASELINE RESULTS ROBUSTNESS: logged dependent variable log(natfrac)

fe3b.lnf <- felm(log(V1) ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & log(V1) > -Inf, ])
fe3b2.lnf <- felm(log(V1) ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & log(V1) > -Inf, ])

stargazer(fe3b.lnf, fe3b2.lnf, type='text')

## OUTPUT
stargazer(fe3b.lnf, fe3b2.lnf,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark')
					))

## -- -- -- -- -- --
## APPENDIX TABLE B.6
## 	BASELINE RESULTS ROBUSTNESS: aggregated and disaggregated results

## WITHIN ORIGIN REGRESSION
##	take mdti to rus/nonrus-season level
##	nonrussian and season level fixed effects only, 2 x num(seasons) observations

ee3d <- mdti[, by=c('season', 'rus'), list(mean(nat), sum(inv*nat)/sum(inv), 
				(1 + sum(nat==1))/(1 + sum(nat==0)), (1 + sum(inv[nat==1]))/(1 + sum(inv[nat==0])))]
ee3d$date <- ee3d$season
ee3d <- merge(ee3d, erdats, by='season')
ee3d$nonrus <- 1 - ee3d$rus

fe3d <- felm(V1 ~ nonrus:log(l1) | rus + season | 0 | 0, data=ee3d[season >=as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
## alternate dependent variable
fe3d2 <- felm(log(V3) ~ nonrus:log(l1) | rus + season | 0 | 0, data=ee3d[season >=as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])


## WITHIN BRAND REGRESSION
##	take mdti to brand-targetgroup-rus-season level

ee3c <- mdti[, by=c('season', 'rus', 'targetgroup', 'brand'), list(mean(nat), sum(inv*nat)/sum(inv), 
				(1 + sum(nat==1))/(1 + sum(nat==0)), (1 + sum(inv[nat==1]))/(1 + sum(inv[nat==0])))]
ee3c$date <- ee3c$season
ee3c <- merge(ee3c, erdats, by='season')
ee3c$nonrus <- 1 - ee3c$rus
ee3c$tgbnr <- paste(ee3c$targetgroup, ee3c$brand, ee3c$nonrus)
ee3c$tgbs <- paste(ee3c$targetgroup, ee3c$brand, ee3c$season, sep='-')		## can do tgd or just date, both work

fe3c <- felm(V1 ~ nonrus:log(l1) | tgbnr + tgbs | 0 | tgbnr, data=ee3c[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
## alternate dependent variable
fe3c2 <- felm(log(V3) ~ nonrus:log(l1) | tgbnr + tgbs | 0 | tgbnr, data=ee3c[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01'), ])


## AGG AND DISAGG TOGETHER
stargazer(fe3d, fe3c, fe3d2, fe3c2, type='text')


## OUTPUT
stargazer(fe3d, fe3c, fe3d2, fe3c2,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Origin FE', '\\checkmark', '', '\\checkmark', ''),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Origin $\\times$ Brand FE', '', '\\checkmark', '', '\\checkmark'),
					c('Group $\\times$ Season $\\times$ Brand FE', '', '\\checkmark', '', '\\checkmark')
					))



## -- -- -- -- -- --
## APPENDIX TABLE B.7
## BASELINE RESULTS ROBUSTNESS: trim two last seasons. referee's suggestion
##	result: coefficients are not so different from regression that includes devaluation, but significance is lower
##	role of devaluation is to increase precision of estimates through increased variation in the X variable

fe3b.rt <- felm(V1 ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2014-09-01'), ])
fe3b2.rt <- felm(V1 ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2014-09-01'), ])
fe3b3.rt <- felm(log(V3) ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2014-09-01'), ])
fe3b4.rt <- felm(log(V3) ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2014-09-01'), ])

stargazer(fe3b.rt, fe3b2.rt, fe3b3.rt, fe3b4.rt, type='text')

## OUTPUT
stargazer(fe3b.rt, fe3b2.rt, fe3b3.rt, fe3b4.rt,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))


## ---------------------------------------------------------
## FIGURE B.1 AND TABLE B.8
## BASELINE RESULTS ROBUSTNESS: targetgroup specific downgrading coefficients
##	season x targetgroup x source level analysis

fe4 <- felm(V1 ~ log(l1):nonrus:targetgroup | tgs + tgnr | 0 | tgs + tgnr, 
		data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
fe4b <- felm(log(V3) ~ log(l1):nonrus:targetgroup | tgs + tgnr | 0 | tgs + tgnr, 
		data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])

ee4 <- mdti[date >= as.Date('2012-01-01') & date <= as.Date('2014-06-01'), by=c('targetgroup', 'nat'), list(mean(cog, na.rm=T))]
ee4 <- ee4[order(targetgroup, nat), ]
ee4 <- ee4[, by='targetgroup', list(V1[nat==1]/V1[nat==0])]
ee4$coef <- coef(fe4)
ee4$sig <- 1*(abs(ee4$coef/fe4$cse) >= 1.96)
ee4$Significant <- ifelse(ee4$sig == 1, 'Significant (p < 0.05)', 'Not Significant')


## OUTPUT
## FIGURE 4
p <- ggplot(data=ee4, aes(x=V1, y=coef, shape=Significant, group=Significant)) + geom_point(size=2) + theme_dan() + xlab('Cost Ratio') + ylab('Quality Downgrading Coef.') + 
	geom_hline(yintercept=0, linetype='dashed', color='red') + theme(legend.position='bottom') + 
	scale_shape_manual(values=c(1, 3))
ggsave('downgrading_cost_nonylon.pdf', p, height=4, width=6)
dev.off()

## TABLE B.8

ee4a <- ee4[, -c('sig', 'Significant'), with=F]
ee4a$se <- fe4$cse
##ee4a$pval <- 2*(1 - pnorm(abs(coef(fe4))/fe4$cse))

ee4a$coefb <- coef(fe4b)
ee4a$seb <- fe4b$cse

ee4a$targetgroup <- proper(gsub('&', 'and', ee4a$targetgroup))
setnames(ee4a, c('Group', 'Quality Cost', 'Coef.', 'SE', 'Coef.', 'SE'))

stargazer(ee4a, summary=F, rownames=F)




## ===================================================================
## QUALITY DOWNGRADING ROBUSTNESS
## ===================================================================

## ---------------------------------------------------------
## raw numbers of SKUs, is there a differential decrease for foreign sourced natural SKUs?
## 	at the targetgroup level, non-Russian products

rob1 <- mdti[, by=c('season', 'rus', 'nat', 'targetgroup'), list(N=length(unique(product_sku)))]
rob1 <- merge(rob1, erdats, by='season')
rob1$nonrus <- 1 - rob1$rus

## observation: (season, origin, natural) + targetgroup
## within targetgroup: nonrus:nat, nonrus:season, nat:season

rob1$tgnrn <- paste(rob1$targetgroup, rob1$nonrus, rob1$nat)
rob1$tgnrs <- paste(rob1$targetgroup, rob1$nonrus, rob1$season)
rob1$tgns <- paste(rob1$targetgroup, rob1$nat, rob1$season)

## at the targetgroup level, no control group
rob1$tgn <- paste(rob1$targetgroup, rob1$nat)
rob1$tgs <- paste(rob1$targetgroup, rob1$season)
rob1$tgm <- paste(rob1$targetgroup, month(rob1$season), sep='-')
rob1$tgnm <- paste(rob1$targetgroup, rob1$nat, month(rob1$season), sep='-')
rob1$t <- match(rob1$season, sort(unique(rob1$season)))

## -- -- -- -- -- --
## APPENDIX TABLE B.10
## QUALITY DOWNGRADING ROBUSTNESS, RAW SKUs, regular FE
##	raw numbers of SKUs, differential decrease for foreign sourced natural SKUs


rfe1a <- felm(log(N) ~ log(l1):nat | tgn + season | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & nonrus == 1, ])
rfe1b <- felm(log(N) ~ log(l1):nat | tgn + tgs | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & nonrus == 1, ])

rfe1a.t <- felm(log(N) ~ log(l1):nat | tgn + season  | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & nonrus == 1, ])
rfe1b.t <- felm(log(N) ~ log(l1):nat | tgn + tgs | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & nonrus == 1, ])


stargazer(rfe1a, rfe1b, rfe1a.t, rfe1b.t, type='text')

## OUTPUT
stargazer(rfe1a, rfe1b, rfe1a.t, rfe1b.t,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Quality FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''), 
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')))

## -- -- -- -- -- --
## APPENDIX TABLE B.11
## QUALITY DOWNGRADING ROBUSTNESS, RAW SKUs, season of year FE interactions

rfe2a <- felm(log(N) ~ log(l1):nat | tgnm + season | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & nonrus == 1, ])
rfe2b <- felm(log(N) ~ log(l1):nat | tgnm + tgs | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & nonrus == 1, ])

rfe2a.t <- felm(log(N) ~ log(l1):nat | tgnm + season  | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & nonrus == 1, ])
rfe2b.t <- felm(log(N) ~ log(l1):nat | tgnm + tgs | 0 | tgn, 
			data=rob1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & nonrus == 1, ])


stargazer(rfe2a, rfe2b, rfe2a.t, rfe2b.t, type='text')

## OUTPUT
stargazer(rfe2a, rfe2b, rfe2a.t, rfe2b.t,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Quality $\\times$ SoY FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''), 
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')))




## ------------------------------------------------------------------
## APPENDIX TABLE B.9
## QUALITY DOWNGRADING ROBUSTNESS, Khandelwal (2010) residual analysis
##	need to input the fixed effect information from the demand regressions
## 	residual analysis using khandelwal (2010) regression that also features differential elasticities

rob3c <- mdti
rob3c <- merge(rob3c, fedt_all, by=c('product_sku', 'season', 'targetgroup'), all.x=T)
rob3c <- rob3c[, by=c('season', 'rus', 'targetgroup'), list(
				linfe_c=mean(linfe_c,na.rm=T), 
				logfe_c=mean(logfe_c,na.rm=T))]

rob3c <- merge(rob3c, erdats, by='season')
rob3c$nonrus <- 1 - rob3c$rus
rob3c$tgnr <- paste(rob3c$targetgroup, rob3c$nonrus)
rob3c$tgs <- paste(rob3c$targetgroup, rob3c$season, sep='-')


rfe3c.1a <- felm(linfe_c ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
	data=rob3c[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
rfe3c.1b <- felm(linfe_c ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr , 
	data=rob3c[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])

rfe3c.2a <- felm(logfe_c ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
	data=rob3c[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
rfe3c.2b <- felm(logfe_c ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
	data=rob3c[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])


## OUTPUT
stargazer(rfe3c.1a, rfe3c.1b, rfe3c.2a, rfe3c.2b, 
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))


## -- -- -- -- -- -- -- -- -- --
## APPENDIX FIGURE B.1
## QUALITY DOWNGRADING ROBUSTNESS, Khandelwal (2010) residual analysis, heterogeneity by targetgroup
##	correlation with baseline heterogeneous targetgroup estimates

rfe3c.het <- felm(linfe_c ~ nonrus:log(l1):targetgroup | tgnr + tgs | 0 | 0, 
	data=rob3c[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ])
rob3c.het <- data.table(ee4$targetgroup, base=coef(fe4), khandelwal=coef(rfe3c.het))

## OUTPUT
p <- ggplot(rob3c.het, aes(x=base, y=khandelwal)) + geom_point(size=2) + theme_dan() + xlab('Baseline Heterogeneous Coefficients') + 
	ylab('Khandelwal Heterogeneous Coefficients') + 
	geom_hline(yintercept = 0, linetype='dashed', color='red') + geom_vline(xintercept = 0, linetype='dashed', color='red')
ggsave('robustness_hetcoef_base_vs_khandelwal_nonylon.pdf', p, height=4, width=6)

## correlation of 0.34
round(cor(rob3c.het$base, rob3c.het$khandelwal), 2)


## ------------------------------------------------------------------
## APPENDIX FIGURE B.2: POLYMER PRESENCE BY MANUFACTURING ORIGIN
##	diff-in-diff between russian polymer frac and imported polymer frac

vv <- dt[season >= as.Date('2012-03-01'), by=c('season', 'rus'), sum(grepl('polymer', mat), na.rm=T)/sum(!is.na(mat))]
vv$Country <- ifelse(vv$rus == 1, 'Domestic', 'Imported')

pdf('polymers.pdf', height=4, width=6)
ggplot(vv, aes(x=season, y=V1, color=Country, group=Country, linetype=Country)) + geom_line() + theme_dan() +
	ylab('Total Polymer Fraction') +
	theme(legend.title=element_blank(), axis.title.x=element_blank()) + 
	scale_linetype_manual(values=c("dashed", "solid"))
dev.off()



## ===================================================================
## REDUCED FORM - PRICE PASS-THROUGH ANALYSIS
## ===================================================================

## OUTPUT FROM THIS ANALYSIS
##	show that there is no differential pass-through for natural vs. non-natural products, implying that
##	reallocation towards lower quality products is not due to a contraction in markups

## ---------------------------------------------------------
## PASS-THROUGH

pt1 <- mdti[, by=c('season', 'brand', 'targetgroup', 'nat', 'rus', 'product_sku'), 
			list(price=price[1], cog=cog[1], inv=inv[1], pi=sum((price - cog)*inv), rev=sum(price*inv))]
pt1$date <- pt1$season
pt1$nonrus <- 1 - pt1$rus

pt1 <- merge(pt1, erdats[,-'price',with=F], by=c('season'))
pt1$tgn <- paste(pt1$targetgroup, pt1$nat, sep='-')
pt1$tgd <- paste(pt1$targetgroup, pt1$date, sep='-')
pt1$tgnr <- paste(pt1$targetgroup, pt1$nonrus, sep='-')
pt1$tgns <- paste(pt1$targetgroup, pt1$nat, month(pt1$date), sep='-')

pt1$tgbn <- paste(pt1$targetgroup, pt1$brand, pt1$nat, sep='-')
pt1$tgbd <- paste(pt1$targetgroup, pt1$brand, pt1$date, sep='-')		## fixed effect tgbd would concentrate out log(l1)
pt1$tgbnr <- paste(pt1$targetgroup, pt1$brand, pt1$nonrus, sep='-')
pt1$tgbns <- paste(pt1$targetgroup, pt1$brand, pt1$nat, month(pt1$date), sep='-')


## ------------------------------------------------------------------
## TABLE 5 
##	baseline price pass-through regressions
##		variation is within targetgroup-brand-natural-moy, targetgroup-brand-nonrussian

## imports only
ptfe1.1a.nr <- felm(log(price) ~ log(l1):nat + log(l1)  | tgbns | 0 | tgbns , 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & rus == 0, ])
ptfe1.2a.nr <- felm(log(cog) ~ log(l1):nat + log(l1)  | tgbns | 0 | tgbns , 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & rus == 0, ])

## imports & domestic
ptfe1.1a <- felm(log(price) ~ log(l1):nat + log(l1) + log(l1):rus + log(l1):rus:nat | tgbns + tgbnr | 0 | tgbns + tgbnr, 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf, ])
ptfe1.2a <- felm(log(cog) ~ log(l1):nat + log(l1) + log(l1):rus + log(l1):rus:nat | tgbns + tgbnr | 0 | tgbns + tgbnr, 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf, ])

stargazer(ptfe1.1a.nr, ptfe1.2a.nr, ptfe1.1a, ptfe1.2a, type='text')

## OUTPUT
stargazer(ptfe1.1a.nr, ptfe1.2a.nr, ptfe1.1a, ptfe1.2a,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Quality $\\times$ Brand $\\times$ Group $\\times$ SoY FE', '\\checkmark', '\\checkmark', '\\checkmark','\\checkmark'),
					c('Brand $\\times$ Group $\\times$ Origin FE', '', '', '\\checkmark', '\\checkmark') 
					))


## ------------------------------------------------------------------
## USED LATER
##	price pass-through, by targetgroup
##	will be used in the counterfactual pass-through section


## checking if natural products have different pass-through
ptfe1.tg <- felm(log(price) ~ log(l1):nat:targetgroup + log(l1):targetgroup | tgbns | 0 | tgbns , 
			data=pt1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & rus == 0, ])

## assuming they don't have different pass-through
ptfe1b.tg <- felm(log(price) ~ log(l1):targetgroup | tgbns | 0 | tgbns, 
			data=pt1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & rus == 0, ])


## ---------------------------------------------------------
## PASS-THROUGH ROBUSTNESS: 
## 	change in coefficient of variation for price dispersion (and cost variation)

pt2 <- mdti[, by=c('season', 'brand', 'targetgroup', 'nat', 'rus', 'product_sku'), list(price=price[1], cog=cog[1])]	# [date == season]
pt2$mu <- pt2$price/pt2$cog
pt2 <- pt2[mu < Inf & mu > 0 & !is.na(mu), ]
pt2 <- pt2[, by=c('season', 'brand', 'targetgroup', 'nat', 'rus'), 
		list(sdmu=sd(mu,na.rm=T), sdp=sd(price,na.rm=T), sdcog=sd(cog,na.rm=T), 
			cvmu=sd(mu,na.rm=T)/mean(mu), cvp=sd(price,na.rm=T)/mean(price, na.rm=T), cvcog=sd(cog, na.rm=T)/mean(cog,na.rm=T))]
pt2$date <- pt2$season
pt2 <- merge(pt2, erdats[, -'price', with=F], by='season')


pt2$tgbn <- paste(pt2$targetgroup, pt2$brand, pt2$nat, sep='-')
pt2$tgbnr <- paste(pt2$targetgroup, pt2$brand, pt2$nat, sep='-')
pt2$tgbs <- paste(pt2$targetgroup, pt2$brand, pt2$date, sep='-')
pt2$tgbns <- paste(pt2$targetgroup, pt2$brand, pt2$nat, month(pt2$season), sep='-')

pt2$tgbnnr <- paste(pt2$targetgroup, pt2$brand, pt2$nat, pt2$nonrus, sep='-')
pt2$tgbdnr <- paste(pt2$targetgroup, pt2$brand, pt2$date, pt2$nonrus, sep='-')


## -- -- -- -- -- -- -- -- -- --
## APPENDIX TABLE B.15 
##	check if within-brand pricing dispersion changes post-shock
## 	use coefficient of variation instead of standard deviations. can log or not log, trim outliers or not, results go through

## IMPORTS ONLY
pt2fe1.1a.nr <- felm(cvp ~ log(l1):nat + log(l1) | tgbns | 0 | tgbns, 
	data=pt2[date >= as.Date('2012-03-01') & date < as.Date('2015-09-01') & rus == 0 & sdmu > 0 & sdp > 0, ])
pt2fe1.3a.nr <- felm(cvcog ~ log(l1):nat + log(l1) | tgbns | 0 | tgbns, 
	data=pt2[date >= as.Date('2012-03-01') & date < as.Date('2015-09-01') & rus == 0 & sdcog > 0 & sdp > 0, ])	

## IMPORTS AND DOMESTIC
pt2fe1.1a <- felm(cvp ~ log(l1):nat + log(l1) + log(l1):rus + log(l1):rus:nat | tgbns + tgbnr | 0 | tgbns, 
	data=pt2[date >= as.Date('2012-03-01') & date < as.Date('2015-09-01') & sdmu > 0 & sdp > 0, ])
pt2fe1.3a <- felm(cvcog ~ log(l1):nat + log(l1) + log(l1):rus + log(l1):rus:nat  | tgbns + tgbnr | 0 | tgbns, 
	data=pt2[date >= as.Date('2012-03-01') & date < as.Date('2015-09-01') & sdcog > 0 & sdp > 0, ])	

stargazer(pt2fe1.1a.nr, pt2fe1.3a.nr, pt2fe1.1a, pt2fe1.3a, type='text')

## OUTPUT	

stargazer(pt2fe1.1a.nr, pt2fe1.3a.nr, pt2fe1.1a, pt2fe1.3a,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Quality $\\times$ Brand $\\times$ Group $\\times$ SoY FE', '\\checkmark', '\\checkmark', '\\checkmark','\\checkmark'),
					c('Brand $\\times$ Group $\\times$ Origin FE', '', '', '\\checkmark', '\\checkmark') 
					))



## ===================================================================
## REDUCED FORM ANALYSIS - QUANTITIES AND EXPENDITURE ANALYSIS
## ===================================================================


## ---------------------------------------------------------
## APPENDIX TABLE B.17 AND B.18  
## ROBUSTNESS CHECKS
## 	TARGETGROUP X NAT X SEASON LEVEL AGGREGATION, IMPORTS ONLY

pt3 <- mdti[, by=c('season', 'targetgroup', 'nat', 'rus'), 
			list(price=sum(price*inv)/sum(inv), inv=sum(inv), exp=sum(price*inv))]
pt3$date <- pt3$season
pt3$nonrus <- 1 - pt3$rus
pt3$fseason <- as.factor(pt3$season)
pt3 <- merge(pt3, erdats[, -'price', with=F], by=c('season'))

pt3$tgn <- paste(pt3$targetgroup, pt3$nat, sep='-')
pt3$tgns <- paste(pt3$targetgroup, pt3$nat, month(pt3$date), sep='-')
pt3$tgd <- paste(pt3$targetgroup, pt3$date, sep='-')

## ROBUSTNESS - EXPENDITURES 
pt3fe1a <- felm(log(exp) ~ log(l1):nat | tgn + season | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01'), ])
pt3fe1b <- felm(log(exp) ~ log(l1):nat | tgn + tgd | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01'), ])
pt3fe1c <- felm(log(exp) ~ log(l1):nat | tgn + season | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season < as.Date('2015-09-01'), ])
pt3fe1d <- felm(log(exp) ~ log(l1):nat | tgn + tgd | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season < as.Date('2015-09-01'), ])

## ROBUSTNESS - QUANTITY RESULTS
pt3fe1a.q <- felm(log(inv) ~ log(l1):nat | tgn + season | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01'), ])
pt3fe1b.q <- felm(log(inv) ~ log(l1):nat | tgn + tgd | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01'), ])
pt3fe1c.q <- felm(log(inv) ~ log(l1):nat | tgn + season | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season < as.Date('2015-09-01'), ])
pt3fe1d.q <- felm(log(inv) ~ log(l1):nat | tgn + tgd | 0 | tgn, 
		data=pt3[nonrus == 1 & season >= as.Date('2012-03-01') & season < as.Date('2015-09-01'), ])


## OUTPUT
## imports only robustness check on expenditures

stargazer(pt3fe1a, pt3fe1b, pt3fe1c, pt3fe1d, 
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Material FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))

## OUTPUT
## imports only robstuness check on quantities

stargazer(pt3fe1a.q, pt3fe1b.q, pt3fe1c.q, pt3fe1d.q,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Material FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))




## ---------------------------------------------------------
## APPENDIX TABLE B.17 AND FIGURE B.6	
## TARGETGROUP X ORIGIN X SEASON LEVEL AGGREGATION, IMPORTS AND DOMESTIC
## 	differential EXPENDITURE switching for non-Russian stuff
##	THESE ARE THE BASELINE RESULTS

pt4 <- mdti[, by=c('season', 'rus', 'targetgroup'), list(qs=sum(inv*nat)/sum(inv), xs=sum(inv*nat*price)/sum(inv*price))]
pt4$nonrus <- 1 - pt4$rus
pt4$tgnr <- paste(pt4$targetgroup, pt4$nonrus)
pt4$tgnr_soy <- paste(pt4$targetgroup, pt4$nonrus, month(pt4$season), sep='-')


pt4$tgs <- paste(pt4$targetgroup, pt4$season)
pt4$fseason <- as.factor(pt4$season)
pt4 <- pt4[season >= as.Date('2011-03-01') & season <= as.Date('2015-09-01'), ]

pt4 <- merge(pt4, erdats[, -'price', with=F], by=c('season'))

pt4$s1 <- 1*(pt4$season == as.Date('2012-03-01'))
pt4$s2 <- 1*(pt4$season == as.Date('2012-09-01'))
pt4$s3 <- 1*(pt4$season == as.Date('2013-03-01'))
pt4$s4 <- 1*(pt4$season == as.Date('2013-09-01'))
pt4$s5 <- 1*(pt4$season == as.Date('2014-03-01'))
pt4$s6 <- 1*(pt4$season == as.Date('2014-09-01'))
pt4$s7 <- 1*(pt4$season == as.Date('2015-03-01'))
pt4$s8 <- 1*(pt4$season == as.Date('2015-09-01'))


## EXPENDITURES
pt4fe1a <- felm(xs ~ nonrus:log(l1) | season + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01') , ])
pt4fe1b <- felm(xs ~ nonrus:log(l1) | tgs + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01') , ])
pt4fe1c <- felm(xs ~ nonrus:log(l1) | season + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season < as.Date('2015-09-01') , ])
pt4fe1d <- felm(xs ~ nonrus:log(l1) | tgs + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season < as.Date('2015-09-01') , ])
stargazer(pt4fe1a, pt4fe1b, pt4fe1c, pt4fe1d, type='text')


## TIMING TEST
pt4fe1.tt <- felm(xs ~ nonrus:s2 + nonrus:s3 + nonrus:s4 + nonrus:s5 + nonrus:s6 + nonrus:s7 + nonrus:s8 | tgs + tgnr | 0 | tgnr_soy, 
		data=pt4[season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01') , ])
pt4dt <- data.table(season=sort(unique(pt4$season[pt4$season <= as.Date('2015-09-01')  & pt4$season >= as.Date('2012-03-01')])) , 
				coef=c(0, coef(pt4fe1.tt)), se=c(NA, pt4fe1.tt$cse))

ggplot(data=pt4dt, aes(x=season, y=coef)) + geom_errorbar(aes(ymin=coef-1.96*se, ymax=coef+1.96*se), width=20) + geom_point() + 
		theme(axis.text.x = element_text(angle = 90, hjust = 1)) + theme_dan() + geom_hline(yintercept=0, linetype='dashed', color='red')


## OUTPUT
## APPENDIX TABLE B.17
##	differential expenditure switching regression output
## 	imports and domestic

stargazer(pt4fe1a, pt4fe1b, pt4fe1c, pt4fe1d, 	
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))

## OUTPUT
## APPENDIX FIGURE B.6
## imports and domestic - timing test

pdf('expenditure_reductions_timing_test.pdf', width=6, height=4)
ggplot(data=pt4dt[season >= as.Date('2012-03-01'), ], aes(x=season, y=coef)) + 
		geom_errorbar(aes(ymin=coef-1.96*se, ymax=coef+1.96*se), width=20) + geom_point() + 
		theme_dan() + geom_hline(yintercept=0, linetype='dashed', color='red') + ylab('Expenditure Share Reduction') + 
	theme(axis.title.x = element_blank()) 
dev.off()




## ===================================================================
## COUNTERFACTUAL PASS-THROUGH BASELINE
## ===================================================================

## predicted price increase given actual exchange rate
##	predicted price for SS2014, predicted share for SS2014
##	predicted price for SS2015, predicted share for SS2015

## predicted price increase given counterfactual exchange rate for number of products
##	predicted price for SS2014, predicted share for SS2014
##	predicted price for SS2015, predicted share for SS2015, GIVEN ALT EXCHANGE RATE

## -- -- -- -- -- -- -- -- -- --
## GROUP LEVEL

## -- -- -- -- --
## number of SKUs

ee3b_cf <- ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01'), ]
ee3b_cf <- ee3b_cf[order(season, rus, targetgroup), ]

fe4_cf <- felm(V1 ~ log(l1):nonrus:targetgroup | tgs + tgnr | 0 | tgs, data=ee3b_cf)

fe4_cf_coefs <- data.table(targetgroup = sort(unique(ee3b_cf$targetgroup)), coef = coef(fe4_cf))
ee3b_cf <- merge(ee3b_cf, fe4_cf_coefs, by='targetgroup')
ee3b_cf <- ee3b_cf[order(season, rus, targetgroup), ]

#a <- data.table(y=fe4_cf$response[,1], yhat=fe4_cf$fitted.values[,1])
#b <- cbind(ee3b_cf, a)


## actual
ee3b_cf$natfrac_hat <- fe4_cf$fitted.values


## counterfactual: seasonal adjustment tgs as in data
ee3b_cf$natfrac_hat_cf1 <- ee3b_cf$natfrac_hat - ee3b_cf$coef*log(ee3b_cf$l1) + 
								ee3b_cf$coef*log(erdats$l1[9])	

## counterfactual: no seasonal adjustment tgs - just predicted ratio at SS2014
ee3b_cf[, by=.(rus, targetgroup), c('natfrac_hat_cf2') := 
				list(natfrac_hat[season == as.Date('2014-03-01')])]

ee3b_cf <- ee3b_cf[rus == 0 & season %in% as.Date(c('2014-03-01', '2015-03-01')), .(targetgroup, season, 
						natfrac_hat, natfrac_hat_cf1, natfrac_hat_cf2)]
ee3b_cf$natfrac_hat_cf1 <- ifelse(ee3b_cf$natfrac_hat_cf1 <= 1, ee3b_cf$natfrac_hat_cf1, 1)

## -- -- -- -- --
## prices

pt1_cf <- mdti[, by=c('season', 'brand', 'targetgroup', 'nat', 'rus', 'product_sku'), 
			list(price=price[1], cog=cog[1], inv=inv[1], pi=sum((price - cog)*inv), rev=sum(price*inv))]
pt1_cf$date <- pt1_cf$season
pt1_cf$nonrus <- 1 - pt1_cf$rus

pt1_cf$tg <- pt1_cf$targetgroup
pt1_cf$tg_soy <- paste(pt1_cf$targetgroup, month(pt1_cf$season), sep='-')
pt1_cf$tgn_soy <- paste(pt1_cf$targetgroup, pt1_cf$nat, month(pt1_cf$season), sep='-')
pt1_cf$tgbns <- paste(pt1_cf$targetgroup, pt1_cf$nat, pt1_cf$brand, month(pt1_cf$season), sep='-')

pt1_cf <- merge(pt1_cf, erdats[,-'price',with=F], by=c('season'))
pt1_cf <- pt1_cf[date >= as.Date('2012-01-01') & date <= as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & rus == 0, ]

cf_ppt_1 <- felm(log(price) ~ log(l1):tg + log(l1):nat:tg | tgn_soy | 0 | tgn_soy, data=pt1_cf)			## main regression
#cf_ppt_1 <- felm(log(price) ~ log(l1):tg | tgn_soy | 0 | tgn_soy, data=pt1_cf)					## main regression
cf_ppt_1.r <- felm(log(price) ~ log(l1) + log(l1):nat | tgn_soy | 0 | tgn_soy, data=pt1_cf)			## robustness for later

cf_ppt_2.nr <- felm(log(price) ~ log(l1):nat + log(l1)  | tgbns | 0 | tgbns , data=pt1_cf)


pt1_cf$phat <- exp(cf_ppt_1$fitted.values)												## predicted prices
pt1_cf$phat2 <- exp(cf_ppt_2.nr$fitted.values)												## predicted prices

pt1_cf <- pt1_cf[, by=.(targetgroup, season, nat), list(phat=min(phat), phat2 = mean(phat2,na.rm=T))]


#pt1_cf <- dcast(cf2b, targetgroup + season ~ nat, value.var='phat')
#pt1_cf$p1 <- pt1_cf$p2 <- NA
pt1_cf <- dcast(pt1_cf, targetgroup + season ~ nat, value.var=c('phat', 'phat2'))
setnames(pt1_cf, c('targetgroup', 'season', 'phat_m0', 'phat_m1', 'phat2_m0', 'phat2_m1'))

## -- -- -- -- --
## combined

dt_cf <- merge(pt1_cf, ee3b_cf, by=c('targetgroup', 'season'))
dt_cf <- merge(dt_cf, erdats[, .(season, l1)], by='season')


## using natfrac

dt_cf$pbar <- (dt_cf$natfrac_hat*dt_cf$phat_m1 + (1 - dt_cf$natfrac_hat)*dt_cf$phat_m0)
dt_cf$pbar_cf1 <- (dt_cf$natfrac_hat_cf1*dt_cf$phat_m1 + (1 - dt_cf$natfrac_hat_cf1)*dt_cf$phat_m0)
dt_cf$pbar_cf2 <- (dt_cf$natfrac_hat_cf2*dt_cf$phat_m1 + (1 - dt_cf$natfrac_hat_cf2)*dt_cf$phat_m0)

dt_cf$pbar2 <- (dt_cf$natfrac_hat*dt_cf$phat2_m1 + (1 - dt_cf$natfrac_hat)*dt_cf$phat2_m0)
dt_cf$pbar2_cf1 <- (dt_cf$natfrac_hat_cf1*dt_cf$phat2_m1 + (1 - dt_cf$natfrac_hat_cf1)*dt_cf$phat2_m0)
dt_cf$pbar2_cf2 <- (dt_cf$natfrac_hat_cf2*dt_cf$phat2_m1 + (1 - dt_cf$natfrac_hat_cf2)*dt_cf$phat2_m0)

out <- dt_cf[, by=.(targetgroup), list(act = (pbar[2]/pbar[1] - 1)/(l1[2]/l1[1] - 1), 
						cfac1 = (pbar_cf1[2]/pbar[1] - 1)/(l1[2]/l1[1] - 1), 
						cfac2 = (pbar_cf2[2]/pbar[2] - 1)/(l1[2]/l1[1] - 1), 
						act2 = (pbar2[2]/pbar2[1] - 1)/(l1[2]/l1[1] - 1), 
						cfac2_1 = (pbar2_cf1[2]/pbar2[1] - 1)/(l1[2]/l1[1] - 1), 
						cfac2_2 = (pbar2_cf2[2]/pbar2[2] - 1)/(l1[2]/l1[1] - 1))]

out <- out[order(-act2), ]
#plot(out$act2)
#points(out$cfac2_1, col='red')


## -- -- -- -- -- 
## FIGURE 3  
##	OUTPUT OF COUNTERFACTUAL PASS-THROUGH

out <- rbind(out, data.table(targetgroup = 'AVERAGE', act2 = mean(out$act2), cfac2_1 = mean(out$cfac2_1)), fill=T)
ordered_tg <- out$targetgroup[order(-out$act2)]

outl <- melt(out, id.vars = 'targetgroup', measure.vars=c('act2', 'cfac2_1'))
outl$variable <- ifelse(outl$variable == 'act2', 'Actual', 'Counterfactual')

###### bold product groups that comprise 80%+ of total sales
temp <- mdti[season == as.Date('2014-03-01'), by=.(targetgroup), list(sales = sum(inv*price))]
temp[, 'share' := list(sales/sum(sales))]
temp <- temp[order(-share), ]
temp$csshare <- cumsum(temp$share)
vec_fontface <- rep(c(ifelse(ordered_tg %in% temp$targetgroup[temp$csshare <= 0.8], "bold", "plain"), "plain"), 2)
######

p <- ggplot(outl, aes(x=targetgroup, y = value, group = variable, shape = variable)) + geom_point() + 
		theme_dan() + ylab('Pass through') + theme(axis.title.x = element_blank()) + theme(axis.text.x = element_text(angle=45,hjust=1)) + 
		scale_x_discrete(limits=ordered_tg) + scale_shape_manual(values=c(1,4)) + 
		theme(axis.text.x=element_text(face=vec_fontface)) + geom_vline(xintercept=which(ordered_tg == 'AVERAGE'), linetype='dotted')	

ggsave('REStat_targetgroup_counterfactual_passthrough_greyscale.pdf', p, height=4, width=6)
dev.off()


## ===================================================================
## COUNTERFACTUAL PASS-THROUGH, FLEXIBLE ROBUSTNESS CHECK
## ===================================================================


## -- -- --
## number of skus

targetgroups <- sort(unique(rob1$targetgroup))
numtg <- length(targetgroups)

cf2a <- rob1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & nonrus == 1, ]
cf2a <- cf2a[order(season, targetgroup, nat), ]
rob1b.nosb <- felm(log(N) ~ log(l1):nat:targetgroup + log(l1):targetgroup + t:targetgroup | tgnm | 0 | tgnm, data=cf2a)

cf2a$nhat <- rob1b.nosb$fitted.values

fER <- unique(rob1$l1[rob1$season == as.Date('2014-09-01')])

cf2a$nhat_cfac <- cf2a$nhat - (cf2a$season >= as.Date('2014-09-01'))*(
				(coef(rob1b.nosb)[1:numtg]*log(cf2a$l1) + coef(rob1b.nosb)[(2*numtg + 1):(3*numtg)]*cf2a$nat*log(cf2a$l1)) - 
				(coef(rob1b.nosb)[1:numtg]*log(fER) + coef(rob1b.nosb)[(2*numtg + 1):(3*numtg)]*cf2a$nat*log(fER))
	)
cf2a <- cf2a[, .(season, nat, targetgroup, nhat, nhat_cfac)]

## -- -- --
## prices

pt1.t <- pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & rus == 0, ]
a_tg <- felm(log(price) ~ log(l1):targetgroup + log(l1):nat:targetgroup | tgbns | 0 | tgbns, data=pt1.t)
#a <- felm(log(price) ~ log(l1):targetgroup | tgns | 0 | tgns, data=pt1.t)

cf2b <- pt1.t
cf2b$phat <- exp(a_tg$fitted.values)
cf2b <- cf2b[, by=.(season, targetgroup, nat), list(price = mean(price), phat = mean(phat))]


## -- -- --
## together
## merged data: by date-targetgroup-nat

cf2 <- merge(cf2a, cf2b, by=c('season', 'targetgroup', 'nat'))
cf2$Nhat <- exp(cf2$nhat)
cf2$Nhat_cfac <- exp(cf2$nhat_cfac)

cf2out <- cf2[, by=.(targetgroup, season), list(act = sum(Nhat*phat)/sum(Nhat), cfac = sum(Nhat_cfac*phat)/sum(Nhat_cfac))]

erinc <- erdats$l1[erdats$season == as.Date('2015-03-01')]/erdats$l1[erdats$season == as.Date('2014-03-01')] - 1

cf2out <- cf2out[season %in% as.Date(c('2015-03-01', '2014-03-01')), by=.(targetgroup), 
		list(act = (act[season == max(season)]/act[season==min(season)] - 1)/erinc,
			cfac = (cfac[season == max(season)]/cfac[season==min(season)] - 1)/erinc)]

#cf2out <- rbind(cf2out, data.table(targetgroup='AVERAGE', act=act_avg/erinc, cfac=cfac_avg/erinc))
cf2out <- rbind(cf2out, data.table(targetgroup='AVERAGE', act=mean(cf2out$act), cfac=mean(cf2out$cfac)))
ordered_tg <- cf2out$targetgroup[order(-cf2out$act)]

cf2outl <- melt(cf2out, id.vars=c('targetgroup'), measure.vars =c('act', 'cfac'))
cf2outl$variable <- ifelse(cf2outl$variable == 'act', 'Actual', 'Counterfactual')

###### bold product groups that comprise 80%+ of total sales
temp <- mdti[season == as.Date('2014-03-01'), by=.(targetgroup), list(sales = sum(inv*price))]
temp[, 'share' := list(sales/sum(sales))]
temp <- temp[order(-share), ]
temp$csshare <- cumsum(temp$share)
vec_fontface <- rep(c(ifelse(ordered_tg %in% temp$targetgroup[temp$csshare <= 0.8], "bold", "plain"), "plain"), 2)
######

p <- ggplot(cf2outl, aes(x=targetgroup, y = value, group = variable, color = variable, shape = variable)) + geom_point() + 
		theme_dan() + ylab('Pass through') + theme(axis.title.x = element_blank()) + theme(axis.text.x = element_text(angle=45,hjust=1)) + 
		scale_x_discrete(limits=ordered_tg) + scale_shape_manual(values=c(1,4)) + 
		theme(axis.text.x=element_text(face=vec_fontface)) + 
		geom_vline(xintercept=which(ordered_tg == 'AVERAGE'), linetype='dotted')	

ggsave('targetgroup_counterfactual_passthrough_robustness.pdf', p, height=4, width=6)
dev.off()


## -- -- -- -- -- -- -- -- -- --
## stargazer output
##	do the regressions group by group for easier formatting in the final table


## -- -- --
## number of SKUs

## need (1) data and (2) model


cf2a <- rob1[season >= as.Date('2012-01-01') & season < as.Date('2015-09-01') & nonrus == 1, ]
#cf2a <- cf2a[order(season, targetgroup, nat), ]
#rob1b.nosb <- felm(log(N) ~ log(l1):nat:targetgroup + log(l1):targetgroup + t:targetgroup | tgnm | 0 | tgnm, data=cf2a)

targetgroups <- sort(unique(cf2a$targetgroup))
rob1b.nosb.l <- lapply(1:length(targetgroups), function(x) felm(log(N) ~ log(l1):nat + log(l1) + t | tgnm , data=cf2a[targetgroup==targetgroups[x], ]))

## split output into two tables, otherwise it's too wide even with sideways table

stargazer(rob1b.nosb.l[1:13],  
	float.env='sidewaystable',
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Quality $\\times$ SoY', rep('\\checkmark', 13))))

stargazer(rob1b.nosb.l[14:length(rob1b.nosb.l)],
	float.env='sidewaystable',
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Quality $\\times$ SoY', rep('\\checkmark', 13))))

## -- -- --
## prices

pt1.t <- pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & rus == 0, ]
a <- felm(log(price) ~ log(l1) + log(l1):nat | tgbns | 0 | tgbns, data=pt1.t)

a_tg.l <- lapply(1:length(targetgroups), function(x) felm(log(price) ~ log(l1) + log(l1):nat | tgbns | 0 | tgbns, data=pt1.t[targetgroup==targetgroups[x],]))


## split output into two tables, otherwise it's too wide even with sideways table
stargazer(a_tg.l[1:13],  
	float.env='sidewaystable',
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Brand $\\times$ Group $\\times$ Quality $\\times$ SoY', rep('\\checkmark', 14))))


stargazer(a_tg.l[14:26],
	float.env='sidewaystable',
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Brand $\\times$ Group $\\times$ Quality $\\times$ SoY', rep('\\checkmark', 14))))

## for tablenotes
paste(proper(targetgroups[1:13]), collapse=', ')
paste(proper(targetgroups[14:26]), collapse=', ')



## ===============================================================================
## COMPLETE ROBUSTNESS - REDUCED FORM FACTS REPLICATED WITH SUBSET OF PRODUCTS
## ===============================================================================

## replicate results for products which have materials of "plausibly higher quality"
## use the group-by-group Khandelwal (2010) regressions to differentiate

## positive demand coefficients
subtgs <- c('UNDERWEAR', 'HIGH BOOTS', 'HEELED SANDALS', 'MOCCASINS & ESPADRILLES', 'SHOES', 'ANKLE BOOTS',
						'SHIRTS', 'BALLERINA SHOES', 'SANDALS', 'HEADWEAR', 'TROUSERS & JUMPSUITS', 'BOOTS',
						'DRESSES', 'KNITWEAR', 'TEE-SHIRTS & POLOS', 'SHORTS', 'SPORT SHOES', 'SCARVES', 'BAGS')

## -- -- -- -- -- -- -- -- -- --
## FACT 1: PROFIT DIFFERENTIAL

cr.pcs.fe1c <- felm(lpi ~ nat | tgs | 0 | tgs , data=pcs[lpi > -Inf & lcog_n > -Inf & targetgroup %in% subtgs,])
cr.pcs.fe2c <- felm(lrev ~ nat | tgs   | 0 | tgs , data=pcs[lpi > -Inf & lcog_n > -Inf  & targetgroup %in% subtgs,])
cr.pcs.fe3c <- felm(lq ~ nat | tgs  | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf  & targetgroup %in% subtgs ,])
cr.pcs.fe4c <- felm(lp ~ nat | tgs  | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf  & targetgroup %in% subtgs ,])
cr.pcs.fe5c <- felm(lcog_n ~ nat | tgs  | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf  & targetgroup %in% subtgs ,])
cr.pcs.fe6c <- felm(lmu ~ nat | tgs | 0 | tgs, data=pcs[lpi > -Inf & lcog_n > -Inf  & targetgroup %in% subtgs, ])

stargazer(cr.pcs.fe1c, cr.pcs.fe2c, cr.pcs.fe3c, cr.pcs.fe4c, cr.pcs.fe5c, cr.pcs.fe6c, type='text')


## -- -- -- -- -- -- -- -- -- --
## FACT 2: QUALITY DOWNGRADING

cr.fe3b <- felm(V1 ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & targetgroup %in% subtgs, ])
cr.fe3b2 <- felm(V1 ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & targetgroup %in% subtgs, ])
cr.fe3b3 <- felm(log(V3) ~ nonrus:log(l1) | tgnr + season | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & targetgroup %in% subtgs, ])
cr.fe3b4 <- felm(log(V3) ~ nonrus:log(l1) | tgnr + tgs | 0 | tgnr, 
			data=ee3b[season >= as.Date('2012-01-01') & season <= as.Date('2015-09-01') & targetgroup %in% subtgs, ])

stargazer(cr.fe3b, cr.fe3b2, cr.fe3b3, cr.fe3b4, type='text')


## -- -- -- -- -- -- -- -- -- --
## FACT 3A: NO DIFFERENTIAL PRICE PASS-THROUGH

## IMPORTS ONLY
cr.ptfe1.1a.nr <- felm(log(price) ~ log(l1):nat + log(l1)  | tgbns | 0 | tgbns , 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & rus == 0 & targetgroup %in% subtgs, ])
cr.ptfe1.3a.nr <- felm(log(cog) ~ log(l1):nat + log(l1)  | tgbns | 0 | tgbns , 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & rus == 0 & targetgroup %in% subtgs, ])

## IMPORTS AND DOMESTIC, ALLOWING DIFFERENTIAL PASS-THROUGH FOR RUSSIAN x NATURAL
cr.ptfe1.1aa <- felm(log(price) ~ log(l1):nat + log(l1) + log(l1):rus + log(l1):rus:nat | tgbns + tgbnr | 0 | tgbns + tgbnr, 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & targetgroup %in% subtgs, ])
cr.ptfe1.3aa <- felm(log(cog) ~ log(l1):nat + log(l1) + log(l1):rus + log(l1):rus:nat | tgbns + tgbnr | 0 | tgbns + tgbnr, 
	data=pt1[date >= as.Date('2012-01-01') & date < as.Date('2015-09-01') & log(cog) < Inf & log(cog) > -Inf & targetgroup %in% subtgs, ])

stargazer(cr.ptfe1.1a.nr, cr.ptfe1.3a.nr, cr.ptfe1.1aa, cr.ptfe1.3aa, type='text')


## FACT 3B: EXPENDITURE SWITCHING?
cr.pt4fe1a <- felm(xs ~ nonrus:log(l1) | season + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01') & targetgroup %in% subtgs, ])
cr.pt4fe1b <- felm(xs ~ nonrus:log(l1) | tgs + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season <= as.Date('2015-09-01') & targetgroup %in% subtgs, ])
cr.pt4fe1c <- felm(xs ~ nonrus:log(l1) | season + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season < as.Date('2015-09-01') & targetgroup %in% subtgs, ])
cr.pt4fe1d <- felm(xs ~ nonrus:log(l1) | tgs + tgnr | 0 | tgnr, 
		data=pt4[season >= as.Date('2012-03-01') & season < as.Date('2015-09-01') & targetgroup %in% subtgs, ])

stargazer(cr.pt4fe1a, cr.pt4fe1b, cr.pt4fe1c, cr.pt4fe1d, type='text')


## ------------------------------------------------------------------------
## OUTPUT

## FACT 1

stargazer(cr.pcs.fe1c, cr.pcs.fe2c, cr.pcs.fe3c, cr.pcs.fe4c, cr.pcs.fe5c, cr.pcs.fe6c,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Group $\\times$ Season FE',  '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark')))

## FACT 2

stargazer(cr.fe3b, cr.fe3b2, cr.fe3b3, cr.fe3b4,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))


## FACT 3A

stargazer(cr.ptfe1.1a.nr, cr.ptfe1.3a.nr, cr.ptfe1.1aa, cr.ptfe1.3aa,
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Quality $\\times$ Brand $\\times$ Group $\\times$ SoY FE', '\\checkmark', '\\checkmark', '\\checkmark','\\checkmark'),
					c('Brand $\\times$ Group $\\times$ Origin FE', '', '', '\\checkmark', '\\checkmark') 
					))


## FACT 3B

stargazer(cr.pt4fe1a, cr.pt4fe1b, cr.pt4fe1c, cr.pt4fe1d, 	
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(	c('Group $\\times$ Origin FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'),
					c('Season FE', '\\checkmark', '', '\\checkmark', ''),
					c('Group $\\times$ Season FE', '', '\\checkmark', '', '\\checkmark')
					))






#.